#!/bin/bash
[ $# -eq 0 ] && { echo "Usage: $0 LANGUAGES WORDSFILE"; exit 1; }

LANG=C
LANGUAGES=$1

expr_path=`pwd`

cat $2 | while read WORD
do
	echo $WORD
	WORD_DIR=$expr_path/Test-en-$LANGUAGES/$WORD
	
	mkdir -p $WORD_DIR

	cd $WORD_DIR
		
		# PREPARE DATA

		# Copy Regexs folder into $WORD_DIR
		cp -r $expr_path/Regexs-en-$LANGUAGES ./Regexs

		echo "Preparing data for experiment execution"
		
		# Convert plain text to sval2 format
		text2sval.pl $WORD-td.txt > $WORD-corpus.xml
		#rm $WORD-td.txt

		# divide corpus into training and test parts	
		preprocess.pl --token Regexs/token.regex --removeNotToken --split 95 $WORD-corpus.xml

		# Create the target.regex
		maketarget.pl -head LEXELT-test.xml
		mv target.regex Regexs/target.regex
		
		#remove unused files
		rm LEXELT-test.xml.* LEXELT-test.count
		rm LEXELT-training.xml
	cd ..
done